Here we simulate from the true model, with the following parameters.
To have realistic values for the parameters, we estimated the values starting from the allen dataset, randomly selecting 100 cells.
For each simulated data, we fit a ZINB model with all the possible combinations (16) of the following parameters:
load("./datasets/sim_allen5.rda")
load("./datasets/sim_allen5_fitted.rda")
load("./datasets/sim_allen5_first.rda")
true_W <- simModel@W
dtrue <- as.matrix(dist(true_W))
biotrue <- as.numeric(factor(bio))
plot(true_W, pch=19, col=biotrue, main="True W", xlab="W1", ylab="W2")
plot(fit[[1]]@W, rep(0, NROW(true_W)), pch=19, col=biotrue, main="K=1", xlab="W1", ylab="")
plot(fit[[2]]@W, pch=19, col=biotrue, main="K=2", xlab="W1", ylab="W2")
pairs(fit[[3]]@W, pch=19, col=biotrue, main="K=3")
pairs(fit[[4]]@W, pch=19, col=biotrue, main="K=4")
load("./datasets/sim_allen5_bias.rda")
K <- 1:4
ggplot(bias[bias$param == 'beta_mu', ], aes(x = K, y = bias)) +
geom_boxplot() + ggtitle('beta_mu') + facet_grid(disp ~ V) +
geom_hline(yintercept = 0, col = 'red')
ggplot(bias[bias$param == 'beta_mu', ], aes(x = V, y = bias)) +
geom_boxplot() + ggtitle('beta_mu') + facet_grid(disp ~ K) +
geom_hline(yintercept = 0, col = 'red')
ggplot(bias[bias$param == 'beta_pi', ], aes(x = K, y = bias)) +
geom_boxplot() + ggtitle('beta_pi') + facet_grid(disp ~ V) +
geom_hline(yintercept = 0, col = 'red')
ggplot(bias[bias$param == 'beta_pi', ], aes(x = V, y = bias)) +
geom_boxplot() + ggtitle('beta_pi') + facet_grid(disp ~ K) +
geom_hline(yintercept = 0, col = 'red')
ggplot(bias[bias$param == 'gamma_mu', ], aes(x = K, y = bias)) +
geom_boxplot() + ggtitle('gamma_mu') + facet_grid(disp ~ V) +
geom_hline(yintercept = 0, col = 'red')
ggplot(bias[bias$param == 'gamma_mu' & bias$V == 'V', ],
aes(x = K, y = bias)) +
geom_boxplot() + ggtitle('gamma_mu') + facet_grid( ~ disp) +
geom_hline(yintercept = 0, col = 'red')
ggplot(bias[bias$param == 'gamma_pi', ], aes(x = K, y = bias)) +
geom_boxplot() + ggtitle('gamma_pi') + facet_grid(disp ~ V) +
geom_hline(yintercept = 0, col = 'red')
ggplot(bias[bias$param == 'gamma_pi' & bias$V == 'V', ],
aes(x = K, y = bias)) +
geom_boxplot() + ggtitle('gamma_pi') + facet_grid( ~ disp) +
geom_hline(yintercept = 0, col = 'red')
ggplot(bias[bias$param == 'theta', ], aes(x = K, y = bias)) +
geom_boxplot() + ggtitle('theta') + facet_grid(disp ~ V) +
geom_hline(yintercept = 0, col = 'red')
ggplot(bias[bias$param == 'theta', ],
aes(x = disp, y = bias)) +
geom_boxplot() + ggtitle('theta') + facet_grid(V ~ K) +
geom_hline(yintercept = 0, col = 'red')
ggplot(bias[bias$param == 'walpha_mu', ], aes(x = K, y = bias)) +
geom_boxplot() + ggtitle('walpha_mu') + facet_grid(disp ~ V) +
geom_hline(yintercept = 0, col = 'red')
ggplot(bias[bias$param == 'walpha_mu', ],
aes(x = interaction(disp, K, V), y = bias)) +
geom_boxplot() + ggtitle('walpha_mu') + xlab('') +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
geom_hline(yintercept = 0, col = 'red')
ggplot(bias[bias$param == 'walpha_pi', ], aes(x = K, y = bias)) +
geom_boxplot() + ggtitle('walpha_pi') + facet_grid(disp ~ V) +
geom_hline(yintercept = 0, col = 'red')
ggplot(bias[bias$param == 'walpha_pi', ],
aes(x = interaction(disp, K, V), y = bias)) +
geom_boxplot() + ggtitle('walpha_pi') + xlab('') +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
geom_hline(yintercept = 0, col = 'red')
ggplot(bias[bias$param == 'mu', ], aes(x = K, y = bias)) +
geom_boxplot() + ggtitle('mu') + facet_grid(disp ~ V) +
geom_hline(yintercept = 0, col = 'red')
ggplot(bias[bias$param == 'pi', ], aes(x = K, y = bias)) +
geom_boxplot() + ggtitle('pi') + facet_grid(disp ~ V) +
geom_hline(yintercept = 0, col = 'red')
load("./datasets/sim_allen5_variance.rda")
K <- 1:4
ggplot(variance[variance$param == 'beta_mu', ], aes(x = K, y = variance)) +
geom_boxplot() + ggtitle('beta_mu') + facet_grid(disp ~ V) +
geom_hline(yintercept = 0, col = 'red')
ggplot(variance[variance$param == 'beta_mu', ], aes(x = V, y = variance)) +
geom_boxplot() + ggtitle('beta_mu') + facet_grid(disp ~ K) +
geom_hline(yintercept = 0, col = 'red')
ggplot(variance[variance$param == 'beta_pi', ], aes(x = K, y = variance)) +
geom_boxplot() + ggtitle('beta_pi') + facet_grid(disp ~ V) +
geom_hline(yintercept = 0, col = 'red')
ggplot(variance[variance$param == 'beta_pi', ], aes(x = V, y = variance)) +
geom_boxplot() + ggtitle('beta_pi') + facet_grid(disp ~ K) +
geom_hline(yintercept = 0, col = 'red')
ggplot(variance[variance$param == 'gamma_mu', ], aes(x = K, y = variance)) +
geom_boxplot() + ggtitle('gamma_mu') + facet_grid(disp ~ V) +
geom_hline(yintercept = 0, col = 'red')
ggplot(variance[variance$param == 'gamma_mu' & variance$V == 'V', ],
aes(x = K, y = variance)) +
geom_boxplot() + ggtitle('gamma_mu') + facet_grid( ~ disp) +
geom_hline(yintercept = 0, col = 'red')
ggplot(variance[variance$param == 'gamma_pi', ], aes(x = K, y = variance)) +
geom_boxplot() + ggtitle('gamma_pi') + facet_grid(disp ~ V) +
geom_hline(yintercept = 0, col = 'red')
ggplot(variance[variance$param == 'gamma_pi' & variance$V == 'V', ],
aes(x = K, y = variance)) +
geom_boxplot() + ggtitle('gamma_pi') + facet_grid( ~ disp) +
geom_hline(yintercept = 0, col = 'red')
ggplot(variance[variance$param == 'theta', ], aes(x = K, y = variance)) +
geom_boxplot() + ggtitle('theta') + facet_grid(disp ~ V) +
geom_hline(yintercept = 0, col = 'red')
ggplot(variance[variance$param == 'theta', ],
aes(x = disp, y = variance)) +
geom_boxplot() + ggtitle('theta') + facet_grid(V ~ K) +
geom_hline(yintercept = 0, col = 'red')
ggplot(variance[variance$param == 'walpha_mu', ], aes(x = K, y = variance)) +
geom_boxplot() + ggtitle('walpha_mu') + facet_grid(disp ~ V) +
geom_hline(yintercept = 0, col = 'red')
ggplot(variance[variance$param == 'walpha_mu', ],
aes(x = interaction(disp, K, V), y = variance)) +
geom_boxplot() + ggtitle('walpha_mu') + xlab('') +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
geom_hline(yintercept = 0, col = 'red')
ggplot(variance[variance$param == 'walpha_pi', ], aes(x = K, y = variance)) +
geom_boxplot() + ggtitle('walpha_pi') + facet_grid(disp ~ V) +
geom_hline(yintercept = 0, col = 'red')
ggplot(variance[variance$param == 'walpha_pi', ],
aes(x = interaction(disp, K, V), y = variance)) +
geom_boxplot() + ggtitle('walpha_pi') + xlab('') +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
geom_hline(yintercept = 0, col = 'red')
ggplot(variance[variance$param == 'mu', ], aes(x = K, y = variance)) +
geom_boxplot() + ggtitle('mu') + facet_grid(disp ~ V) +
geom_hline(yintercept = 0, col = 'red')
ggplot(variance[variance$param == 'pi', ], aes(x = K, y = variance)) +
geom_boxplot() + ggtitle('pi') + facet_grid(disp ~ V) +
geom_hline(yintercept = 0, col = 'red')
mse = bias
colnames(mse)[1] = c('mse')
mse$mse = bias$bias^2 + variance$variance
ggplot(mse[mse$param == 'beta_mu', ], aes(x = K, y = mse)) +
geom_boxplot() + ggtitle('beta_mu') + facet_grid(disp ~ V) +
geom_hline(yintercept = 0, col = 'red')
ggplot(mse[mse$param == 'beta_mu', ], aes(x = V, y = mse)) +
geom_boxplot() + ggtitle('beta_mu') + facet_grid(disp ~ K) +
geom_hline(yintercept = 0, col = 'red')
ggplot(mse[mse$param == 'beta_pi', ], aes(x = K, y = mse)) +
geom_boxplot() + ggtitle('beta_pi') + facet_grid(disp ~ V) +
geom_hline(yintercept = 0, col = 'red')
ggplot(mse[mse$param == 'beta_pi', ], aes(x = V, y = mse)) +
geom_boxplot() + ggtitle('beta_pi') + facet_grid(disp ~ K) +
geom_hline(yintercept = 0, col = 'red')
ggplot(mse[mse$param == 'gamma_mu', ], aes(x = K, y = mse)) +
geom_boxplot() + ggtitle('gamma_mu') + facet_grid(disp ~ V) +
geom_hline(yintercept = 0, col = 'red')
ggplot(mse[mse$param == 'gamma_mu' & mse$V == 'V', ],
aes(x = K, y = mse)) +
geom_boxplot() + ggtitle('gamma_mu') + facet_grid( ~ disp) +
geom_hline(yintercept = 0, col = 'red')
ggplot(mse[mse$param == 'gamma_pi', ], aes(x = K, y = mse)) +
geom_boxplot() + ggtitle('gamma_pi') + facet_grid(disp ~ V) +
geom_hline(yintercept = 0, col = 'red')
ggplot(mse[mse$param == 'gamma_pi' & mse$V == 'V', ],
aes(x = K, y = mse)) +
geom_boxplot() + ggtitle('gamma_pi') + facet_grid( ~ disp) +
geom_hline(yintercept = 0, col = 'red')
ggplot(mse[mse$param == 'theta', ], aes(x = K, y = mse)) +
geom_boxplot() + ggtitle('theta') + facet_grid(disp ~ V) +
geom_hline(yintercept = 0, col = 'red')
ggplot(mse[mse$param == 'theta', ],
aes(x = disp, y = mse)) +
geom_boxplot() + ggtitle('theta') + facet_grid(V ~ K) +
geom_hline(yintercept = 0, col = 'red')
ggplot(mse[mse$param == 'walpha_mu', ], aes(x = K, y = mse)) +
geom_boxplot() + ggtitle('walpha_mu') + facet_grid(disp ~ V) +
geom_hline(yintercept = 0, col = 'red')
ggplot(mse[mse$param == 'walpha_mu', ],
aes(x = interaction(disp, K, V), y = mse)) +
geom_boxplot() + ggtitle('walpha_mu') + xlab('') +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
geom_hline(yintercept = 0, col = 'red')
ggplot(mse[mse$param == 'walpha_pi', ], aes(x = K, y = mse)) +
geom_boxplot() + ggtitle('walpha_pi') + facet_grid(disp ~ V) +
geom_hline(yintercept = 0, col = 'red')
ggplot(mse[mse$param == 'walpha_pi', ],
aes(x = interaction(disp, K, V), y = mse)) +
geom_boxplot() + ggtitle('walpha_pi') + xlab('') +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
geom_hline(yintercept = 0, col = 'red')
ggplot(mse[mse$param == 'mu', ], aes(x = K, y = mse)) +
geom_boxplot() + ggtitle('mu') + facet_grid(disp ~ V) +
geom_hline(yintercept = 0, col = 'red')
ggplot(mse[mse$param == 'pi', ], aes(x = K, y = mse)) +
geom_boxplot() + ggtitle('pi') + facet_grid(disp ~ V) +
geom_hline(yintercept = 0, col = 'red')
load("./datasets/sim_allen5_dist.rda")
corr5 <- lapply(6:10, function(i) rowMeans(sapply(res, function(x) x[[i]])))
corr5 = do.call(cbind, corr5)
corr5 = data.frame(corr5,stringsAsFactors = F)
colnames(corr5) <- c(paste0("zinb k=", 1:4), "FQ PCA (k=2)")
corr5$pzero = .5
boxplot(corr5[,1:5],border=c(1, 2, 1, 1), xlab="method", ylab="Correlation",
las=2,
main="Correlation between true and estimated sample distances in W\npzero = 0.5")
load("./datasets/sim_allen25_dist.rda")
corr25 <- lapply(6:10, function(i) rowMeans(sapply(res, function(x) x[[i]])))
corr25 = do.call(cbind, corr25)
corr25 = data.frame(corr25,stringsAsFactors = F)
colnames(corr25) <- c(paste0("zinb k=", 1:4), "FQ PCA (k=2)")
corr25$pzero = .25
boxplot(corr25[,1:5], main="Correlation between true and estimated sample distances in W\npzero = 0.25", border=c(1, 2, 1, 1), xlab="method", ylab="Correlation", las=2)
load("./datasets/sim_allen75_dist.rda")
corr75 <- lapply(6:10, function(i) rowMeans(sapply(res, function(x) x[[i]])))
corr75 = do.call(cbind, corr75)
corr75 = data.frame(corr75,stringsAsFactors = F)
colnames(corr75) <- c(paste0("zinb k=", 1:4), "FQ PCA (k=2)")
corr75$pzero = .75
boxplot(corr75[,1:5], main="Correlation between true and estimated sample distances in W\npzero = 0.75", border=c(1, 2, 1, 1), xlab="method", ylab="Correlation", las=2)
corr = rbind(corr5, corr25, corr75)
corr = melt(corr, id.vars = 'pzero')
ggplot(corr, aes(x = variable, y= value, col = variable)) + geom_boxplot() +
facet_grid(~ pzero) + ggtitle('Correlation between true and estimated sample distances in W') + ylab('Correlation') + xlab('Method')
ggplot(corr, aes(x = factor(pzero), y= value, col = factor(pzero))) +
geom_boxplot() + facet_grid(~ variable) + ggtitle('Correlation between true and estimated sample distances in W') + ylab('Correlation') + xlab('Method')
load("./datasets/sim_allen5_dist.rda")
sil5 <- lapply(11:15, function(i) rowMeans(sapply(res, function(x) x[[i]])))
sil5 = do.call(cbind, sil5)
sil5 = data.frame(sil5,stringsAsFactors = F)
colnames(sil5) <- c(paste0("zinb k=", 1:4), "FQ PCA (k=2)")
sil5$pzero = .5
boxplot(sil5[,1:5],border=c(1, 2, 1, 1), xlab="", ylab="Silhouette width",
las=2,
main="Silhouette width of true labels\npzero = 0.5")
load("./datasets/sim_allen25_dist.rda")
sil25 <- lapply(11:15, function(i) rowMeans(sapply(res, function(x) x[[i]])))
sil25 = do.call(cbind, sil25)
sil25 = data.frame(sil25,stringsAsFactors = F)
colnames(sil25) <- c(paste0("zinb k=", 1:4), "FQ PCA (k=2)")
sil25$pzero = .25
boxplot(sil25[,1:5], main="Silhouette width of true labels\npzero = 0.25", border=c(1, 2, 1, 1), xlab="", ylab="Silhouette width", las=2)
load("./datasets/sim_allen75_dist.rda")
sil75 <- lapply(11:15, function(i) rowMeans(sapply(res, function(x) x[[i]])))
sil75 = do.call(cbind, sil75)
sil75 = data.frame(sil75,stringsAsFactors = F)
colnames(sil75) <- c(paste0("zinb k=", 1:4), "FQ PCA (k=2)")
sil75$pzero = .75
boxplot(sil75[,1:5], main="Silhouette width of true labels\npzero = 0.75", border=c(1, 2, 1, 1), xlab="", ylab="Silhouette width", las=2)
sil = rbind(sil25, sil5, sil75)
sil = melt(sil, id.vars = 'pzero')
ggplot(sil, aes(x = variable, y= value, col = variable)) + geom_boxplot() +
facet_grid(~ pzero) + ggtitle('Silhouette width of true labels') + ylab('Silhouette width') + xlab('')
ggplot(sil, aes(x = factor(pzero), y= value, col = factor(pzero))) +
geom_boxplot() + facet_grid(~ variable) + ggtitle('Silhouette width of true labels') + ylab('Silhouette width') + xlab('')
load('./datasets/sim_allen25.rda')
load('./datasets/sim_allen25_fitted.rda')
fit = fittedSim[[2]][[1]][[2]][[1]]
fq <- betweenLaneNormalization(t(simData[[1]]$counts), which="full")
pca <- prcomp(t(log1p(fq)))
par(mfrow=c(1,2))
plot(pca$x, col = biotrue, main = 'PCA\nfZero = 0.25')
plot(fit@W, col = biotrue, main = 'Estimated W, k = 2\nfZero = 0.25',
xlab = 'W1', ylab = 'W2')
load('./datasets/sim_allen5.rda')
load('./datasets/sim_allen5_fitted.rda')
fit = fittedSim[[2]][[1]][[2]][[1]]
fq <- betweenLaneNormalization(t(simData[[1]]$counts), which="full")
pca <- prcomp(t(log1p(fq)))
par(mfrow=c(1,2))
plot(pca$x, col = biotrue, main = 'PCA\nfZero = 0.5')
plot(fit@W, col = biotrue, main = 'Estimated W, k = 2\nfZero = 0.5',
xlab = 'W1', ylab = 'W2')
load('./datasets/sim_allen75.rda')
load('./datasets/sim_allen75_fitted.rda')
fit = fittedSim[[2]][[1]][[2]][[1]]
fq <- betweenLaneNormalization(t(simData[[1]]$counts), which="full")
pca <- prcomp(t(log1p(fq)))
par(mfrow=c(1,2))
plot(pca$x, col = biotrue, main = 'PCA\nfZero = 0.75')
plot(fit@W, col = biotrue, main = 'Estimated W, k = 2\nfZero = 0.75',
xlab = 'W1', ylab = 'W2')
Let’s look at model fit when chosen parameters are correct (V, genewise dispersion, K=2).
computeExp <- function(zinbModel, model = 'zinb'){
if (model == 'zinb'){
(1 - t(getPi(zinbModel))) * t(getMu(zinbModel))
}else{
t(getMu(zinbModel))
}
}
computeVar <- function(zinbModel, model = 'zinb'){
mu = t(getMu(zinbModel))
pi = t(getPi(zinbModel))
phi = exp(-getZeta(zz))
if (model == 'zinb'){
(1 - pi) * mu * (1 + mu*(phi + pi))
}else{
mu * (1 + mu*phi)
}
}
computeP0 <- function(zinbModel, model = 'zinb'){
mu = t(getMu(zinbModel))
pi = t(getPi(zinbModel))
phi = exp(-getZeta(zinbModel))
if (model == 'zinb'){
pi + (1 - pi) * (1 + phi * mu) ^ (-1/phi)
}else{
(1 + phi * mu) ^ (-1/phi)
}
}
plotMD <- function(x, y, xlim = c(0,10), ylim = c(-5, 5),
main = 'ZINB: MD-plot estimated vs. observed mean count, log scale'){
mm = .5*(x + y)
dd = x - y
smoothScatter(mm, dd, xlim = xlim, ylim = ylim, xlab = 'Mean', ylab = 'Diff', main = main)
abline(h = 0, col = 'gray')
fit = loess(dd ~ mm)
xpred = seq(0, 10, .1)
pred = predict(fit, xpred)
lines(xpred, pred, col = 'red', type = 'l', lwd=2)
}
load('./datasets/sim_allen5_fitted.rda')
zz = fittedSim[[2]][[1]][[2]][[1]]
zinbEY = log(rowMeans(computeExp(zz, model = 'zinb')))
nbEY = log(rowMeans(computeExp(zz, model = 'nb')))
logAveCount = log(rowMeans(originalCounts))
prop0 = rowMeans(originalCounts == 0)
zinbPY0 = rowMeans(computeP0(zz, model = 'zinb'))
nbPY0 = rowMeans(computeP0(zz, model = 'nb'))
plotMD(logAveCount, zinbEY, xlim = c(2, 12), ylim = c(-15, 5),
main = 'ZINB: MD-plot estimated vs. observed mean count, log scale')
plotMD(logAveCount, zinbEY, xlim = c(3, 10), ylim = c(-2, 2),
main = 'ZINB: MD-plot estimated vs. observed mean count,\n log scale, zoomed')
plotMD(logAveCount, nbEY, xlim = c(0, 60), ylim = c(-110, 2),
main = 'NB: MD-plot estimated vs. observed mean count, log scale')
## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, :
## Binning grid too coarse for current (small) bandwidth: consider increasing
## 'gridsize'
plotMD(logAveCount, nbEY, xlim = c(3, 10), ylim = c(-5, 5),
main = 'NB: MD-plot estimated vs. observed mean count,\n log scale, zoomed')
MD-plot estimated vs. observed mean count, log scale, zoomed
par(mfrow=c(1,2))
plotMD(logAveCount, zinbEY, xlim = c(3, 10), ylim = c(-5, 5),
main = 'ZINB')
plotMD(logAveCount, nbEY, xlim = c(3, 10), ylim = c(-5, 5),
main = 'NB')
MD-plot estimated vs. observed zero probability
par(mfrow=c(1,2))
plotMD(prop0, zinbPY0, xlim = c(0, 1), ylim = c(-1, 1),
main = 'ZINB')
plotMD(prop0, nbPY0, xlim = c(0, 1), ylim = c(-1, 1),
main = 'NB')
smoothScatter(logAveCount, rowMeans(originalCounts == 0), xlab = 'log average count',
ylab = 'proportion of zeros', main = 'Zero probability versus Mean')
fit = loess(zinbPY0 ~ zinbEY)
xpred = seq(1,10,.1)
pred = predict(fit, xpred)
lines(xpred, pred, col = 'red', type = 'l', lwd=2)
fit = loess(nbPY0 ~ nbEY)
xpred = seq(1,10,.1)
pred = predict(fit, xpred)
lines(xpred, pred, col = 'green', type = 'l', lwd=2)
fit = loess(rowMeans(originalCounts == 0) ~ logAveCount)
xpred = seq(1,10,.1)
pred = predict(fit, xpred)
lines(xpred, pred, col = 'blue', type = 'l', lwd=2)
legend('topright', c('observed', 'zinb', 'nb'),
lty=1, col=c('blue', 'red', 'green'), bty='n', cex=.75)
smoothScatter(rowMeans(originalCounts == 0), exp(-zz@zeta),
xlab = 'Observed zero probability', ylab = 'Estimated dispersion',
main = 'ZINB: dispersion versus zero probability')
fit = loess(exp(-zz@zeta) ~ rowMeans(originalCounts == 0))
xpred = seq(0, 1, .05)
pred = predict(fit, xpred)
lines(xpred, pred, col = 'red', type = 'l', lwd=2)
load('./datasets/sim_allen25_fitted.rda')
zz = fittedSim[[2]][[1]][[2]][[1]]
zinbEY = log(rowMeans(computeExp(zz, model = 'zinb')))
logAveCount = log(rowMeans(originalCounts))
nbEY = log(rowMeans(computeExp(zz, model = 'nb')))
zinbPY0 = rowMeans(computeP0(zz, model = 'zinb'))
prop0 = rowMeans(originalCounts == 0)
nbPY0 = rowMeans(computeP0(zz, model = 'nb'))
MD-plot estimated vs. observed mean count, log scale, zoomed
par(mfrow=c(1,2))
plotMD(logAveCount, zinbEY, xlim = c(3, 10), ylim = c(-5, 5),
main = 'ZINB')
plotMD(logAveCount, nbEY, xlim = c(3, 10), ylim = c(-5, 5),
main = 'NB')
MD-plot estimated vs. observed zero probability
par(mfrow=c(1,2))
plotMD(prop0, zinbPY0, xlim = c(0, 1), ylim = c(-1, 1),
main = 'ZINB')
plotMD(prop0, nbPY0, xlim = c(0, 1), ylim = c(-1, 1),
main = 'NB')
smoothScatter(logAveCount, rowMeans(originalCounts == 0), xlab = 'log average count',
ylab = 'proportion of zeros', main = 'Zero probability versus Mean')
fit = loess(zinbPY0 ~ zinbEY)
xpred = seq(1,10,.1)
pred = predict(fit, xpred)
lines(xpred, pred, col = 'red', type = 'l', lwd=2)
fit = loess(nbPY0 ~ nbEY)
xpred = seq(1,10,.1)
pred = predict(fit, xpred)
lines(xpred, pred, col = 'green', type = 'l', lwd=2)
fit = loess(rowMeans(originalCounts == 0) ~ logAveCount)
xpred = seq(1,10,.1)
pred = predict(fit, xpred)
lines(xpred, pred, col = 'blue', type = 'l', lwd=2)
legend('topright', c('observed', 'zinb', 'nb'),
lty=1, col=c('blue', 'red', 'green'), bty='n', cex=.75)
smoothScatter(rowMeans(originalCounts == 0), exp(-zz@zeta),
xlab = 'Observed zero probability', ylab = 'Estimated dispersion',
main = 'ZINB: dispersion versus zero probability')
fit = loess(exp(-zz@zeta) ~ rowMeans(originalCounts == 0))
xpred = seq(0, 1, .05)
pred = predict(fit, xpred)
lines(xpred, pred, col = 'red', type = 'l', lwd=2)
load('./datasets/sim_allen75_fitted.rda')
zz = fittedSim[[2]][[1]][[2]][[1]]
zinbEY = log(rowMeans(computeExp(zz, model = 'zinb')))
logAveCount = log(rowMeans(originalCounts))
nbEY = log(rowMeans(computeExp(zz, model = 'nb')))
zinbPY0 = rowMeans(computeP0(zz, model = 'zinb'))
prop0 = rowMeans(originalCounts == 0)
nbPY0 = rowMeans(computeP0(zz, model = 'nb'))
MD-plot estimated vs. observed mean count, log scale, zoomed
par(mfrow=c(1,2))
plotMD(logAveCount, zinbEY, xlim = c(2, 10), ylim = c(-15, 15),
main = 'ZINB')
plotMD(logAveCount, nbEY, xlim = c(2, 10), ylim = c(-15, 15),
main = 'NB')
MD-plot estimated vs. observed zero probability
par(mfrow=c(1,2))
plotMD(prop0, zinbPY0, xlim = c(0, 1), ylim = c(-1, 1),
main = 'ZINB')
plotMD(prop0, nbPY0, xlim = c(0, 1), ylim = c(-1, 1),
main = 'NB')
smoothScatter(logAveCount, rowMeans(originalCounts == 0), xlab = 'log average count',
ylab = 'proportion of zeros', main = 'Zero probability versus Mean')
fit = loess(zinbPY0 ~ zinbEY)
xpred = seq(1,10,.1)
pred = predict(fit, xpred)
lines(xpred, pred, col = 'red', type = 'l', lwd=2)
fit = loess(nbPY0 ~ nbEY)
xpred = seq(1,10,.1)
pred = predict(fit, xpred)
lines(xpred, pred, col = 'green', type = 'l', lwd=2)
fit = loess(rowMeans(originalCounts == 0) ~ logAveCount)
xpred = seq(1,10,.1)
pred = predict(fit, xpred)
lines(xpred, pred, col = 'blue', type = 'l', lwd=2)
legend('topright', c('observed', 'zinb', 'nb'),
lty=1, col=c('blue', 'red', 'green'), bty='n', cex=.75)
smoothScatter(rowMeans(originalCounts == 0), exp(-zz@zeta),
xlab = 'Observed zero probability', ylab = 'Estimated dispersion',
main = 'ZINB: dispersion versus zero probability')
fit = loess(exp(-zz@zeta) ~ rowMeans(originalCounts == 0))
xpred = seq(0, 1, .05)
pred = predict(fit, xpred)
lines(xpred, pred, col = 'red', type = 'l', lwd=2)